3  bulkTCRseq: Percentage of significantly expanding clones across patients

3.1 Set up workspace

# Libraries
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

3.2 Open clones that significantly expanded after vaccination

file_list <- list.files(".")
file_list <- file_list[str_detect(file_list, "pval_postvax_vs_prevax_Part4.csv")]
pt_name <- str_split_i(file_list, "_", 1)

# Read in csvs
SE_vax.list <- lapply(file_list, read.csv)
# Rename
names(SE_vax.list) <- pt_name

# Reformat data
for(i in c(1:length(SE_vax.list))){
  # Filter for significant clones
  SE_vax.list[[i]] <- SE_vax.list[[i]] %>% 
    filter(sig == "Sig") %>%
  # Add a column for Patient
    mutate(Patient = toupper(names(SE_vax.list[i]))) %>%
  # Select important columns
    select(Beta_clonotype, sig, Patient) %>%
    # Rename sig
    dplyr::rename("SE_vax" = "sig")
}

3.3 Open clones that significantly expanded after nivolumab

file_list <- list.files(".")
file_list <- file_list[str_detect(file_list, "pval_prevax_vs_pretreatment_Part4.csv")]
pt_name <- str_split_i(file_list, "_", 1)

# Read in csvs
SE_nivo.list <- lapply(file_list, read.csv)
# Rename
names(SE_nivo.list) <- pt_name

# Reformat data
for(i in c(1:length(SE_nivo.list))){
  # Filter for significant clones
  SE_nivo.list[[i]] <- SE_nivo.list[[i]] %>% 
    filter(sig == "Sig") %>%
  # Add a column for Patient
    mutate(Patient = toupper(names(SE_nivo.list[i]))) %>%
  # Select important columns
    select(Beta_clonotype, sig, Patient) %>%
    # Rename sig
    dplyr::rename("SE_nivo" = "sig")
}

3.4 Open Existing, de novo Post-Nivo and Post-Vax clones

file_list <- list.files(".")
file_list <- file_list[str_detect(file_list, "merged_typed_full_Part3.csv")]
pt_name <- str_split_i(file_list, "_", 1)

# Read in csvs
typed_clones.list <- lapply(file_list, read.csv)
# Rename list
names(typed_clones.list) <- pt_name

# Reformat data
for(i in c(1:length(typed_clones.list))){
  # Filter for non-Other clones
  typed_clones.list[[i]] <- typed_clones.list[[i]] %>% 
    filter(Type != "Other") %>%
    # Select important columns 
    select(Beta_clonotype, Type, Patient, max_log2fc_pool_vs_dmso, ends_with("prevax"), ends_with("postvax"))
  colnames(typed_clones.list[[i]])[5] <- "prevax"
  colnames(typed_clones.list[[i]])[7] <- "postvax"
}

3.5 Open IMP-expanded clones

file_list <- list.files(".")
file_list <- file_list[str_detect(file_list, "betas_merged_typed_imp_expanded_Part3.csv")]
pt_name <- str_split_i(file_list, "_", 1)

# Read in csvs
imp_clones.list <- lapply(file_list, read.csv)
# Rename list
names(imp_clones.list) <- pt_name

# Reformat data
for(i in c(1:length(imp_clones.list))){
  imp_clones.list[[i]] <- imp_clones.list[[i]] %>% 
    # Select important columns 
    select(Beta_clonotype, Type, Patient, expanded_local_min)
}

3.6 Merge lists into a large dataframe

SE_vax.df <- do.call(rbind, SE_vax.list)
SE_nivo.df <- do.call(rbind, SE_nivo.list)
typed_clones.df <- do.call(rbind, typed_clones.list)
imp_clones.df <- do.call(rbind, imp_clones.list)

clones.df <- SE_vax.df %>%
  full_join(SE_nivo.df) %>%
  full_join(typed_clones.df) %>%
  full_join(imp_clones.df) %>%
  mutate(SE_nivo = case_when(SE_nivo == "Sig" ~ "SE_nivo",
                                      is.na(SE_nivo) ~ "Non_SE_nivo"),
         SE_vax = case_when(SE_vax == "Sig" ~ "SE_vax",
                                      is.na(SE_vax) ~ "Non_SE_vax"),
         Type = case_when(is.na(Type) ~ "Other",
                          T ~ Type),
         IMP_expanded = case_when(expanded_local_min == TRUE ~ "IMP-expanded",
                                        is.na(expanded_local_min) & Type == "Other" ~ "NA",
                                        T ~ "Non_IMP-expanded"),
         SE = case_when(SE_vax == "Non_SE_vax" & SE_nivo == "Non_SE_nivo" ~ "Non SE",
                        SE_vax == "Non_SE_vax" & SE_nivo == "SE_nivo" ~ "SE nivo",
                        SE_vax == "SE_vax" & SE_nivo == "Non_SE_nivo" ~ "SE vax",
                        SE_vax == "SE_vax" & SE_nivo == "SE_nivo" ~ "SE nivo + vax"),
         SE_general = case_when(SE == "Non SE" ~ "Non SE",
                                T ~ "SE")) %>%
  select(-expanded_local_min, -max_log2fc_pool_vs_dmso)
Joining with `by = join_by(Beta_clonotype, Patient)`
Joining with `by = join_by(Beta_clonotype, Patient)`
Joining with `by = join_by(Beta_clonotype, Patient, Type)`

3.7 Plot the number of existing, post-nivolumab and post-vaccine clones that are also significantly expanding after nivolumab or vaccination for Fig 3C top

clones_sum <- clones.df %>%
  filter(Type %in% c("Existing", "Post-Nivolumab", "Post-Vaccine")) %>%
  mutate(Type = factor(Type)) %>%
  group_by(SE, SE_general, Patient, Type) %>%
  dplyr::count()

plot <- clones_sum %>%
  ggplot(aes(y = n, x = Type, fill = SE_general)) +
  geom_col(position = "fill", size = 0.2, width = 0.7, color = "black") +
  theme_classic() +
  facet_wrap(~Patient, ncol = 9) +
  scale_fill_manual(values = c("white", "black"),
                    name = "Category") +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        strip.background = element_blank(),
        strip.text = element_blank(),
        axis.title.x = element_blank()) +
  ylab("Proportion of clones") +
  annotate("rect", col = "grey70", fill = "grey70", size = 1,
        xmin = as.numeric(unique(clones_sum$Type)[[1]]) - 0.3,
        xmax = as.numeric(unique(clones_sum$Type)[[1]]) + 0.3,
        ymin = -0.05, ymax = -0.15) +
  annotate("rect", col = "darkgoldenrod3", fill = "darkgoldenrod3", size = 1,
        xmin = as.numeric(unique(clones_sum$Type)[[2]]) - 0.3,
        xmax = as.numeric(unique(clones_sum$Type)[[2]]) + 0.3,
        ymin = -0.05, ymax = -0.15) +
  annotate("rect", col = "#0072B2", fill = "#0072B2", size = 1,
        xmin = as.numeric(unique(clones_sum$Type)[[3]]) - 0.3,
        xmax = as.numeric(unique(clones_sum$Type)[[3]]) + 0.3,
        ymin = -0.05, ymax = -0.15) +
  scale_y_continuous(expand = c(0, 0.06))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
plot

3.8 Plot the number of IMP expanded clones are also significantly expanding after vaccination for Fig 3H left

imp_clones_sum <- clones.df %>%
  filter(Type %in% c("Existing", "Post-Nivolumab", "Post-Vaccine")) %>%
  group_by(SE_vax, Patient, IMP_expanded) %>%
  dplyr::count() 

filt_imp_clones_sum <- imp_clones_sum %>%
  filter(IMP_expanded == "IMP-expanded") %>%
  mutate(Patient = factor(Patient, levels = c("P101", "P104", "P109", "P108", "P105", "P111", "P106", "P110", "P103")),
         SE_vax = case_when(SE_vax == "Non_SE_vax" ~ "Non SE after vaccine",
                            SE_vax == "SE_vax" ~ "SE after vaccine"))

plot <- filt_imp_clones_sum %>%
  ggplot(aes(y = Patient, x = n, fill = SE_vax)) +
  geom_col(position = "fill") +
  scale_fill_manual(values = c("white", "black"), name = "Category") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.ticks.y = element_blank()) +
  xlab("Proportion of\nIMP expanded clones\nthat are SE after vaccine") +
  coord_cartesian(xlim = c(0, 1)) +
  annotate("rect", col = "#920000FF", alpha = 0, size = 0.8,
        xmin = -0.01, xmax = 1.01,
        ymin = as.numeric(unique(filt_imp_clones_sum$Patient)[[1]]) - 0.44, 
        ymax = as.numeric(unique(filt_imp_clones_sum$Patient)[[1]]) + 0.44) +
  annotate("rect", col = "#920000FF", alpha = 0, size = 0.8,
        xmin = -0.01, xmax = 1.01,
        ymin = as.numeric(unique(filt_imp_clones_sum$Patient)[[2]]) - 0.44, 
        ymax = as.numeric(unique(filt_imp_clones_sum$Patient)[[2]]) + 0.44) +
  annotate("rect", col = "#920000FF", alpha = 0, size = 0.8,
        xmin = -0.01, xmax = 1.01,
        ymin = as.numeric(unique(filt_imp_clones_sum$Patient)[[3]]) - 0.44, 
        ymax = as.numeric(unique(filt_imp_clones_sum$Patient)[[3]]) + 0.44) +
  annotate("rect", col = "#920000FF", alpha = 0, size = 0.8,
        xmin = -0.01, xmax = 1.01,
        ymin = as.numeric(unique(filt_imp_clones_sum$Patient)[[4]]) - 0.44, 
        ymax = as.numeric(unique(filt_imp_clones_sum$Patient)[[4]]) + 0.44) +
  annotate("rect", col = "#920000FF", alpha = 0, size = 0.8,
        xmin = -0.01, xmax = 1.01,
        ymin = as.numeric(unique(filt_imp_clones_sum$Patient)[[5]]) - 0.44, 
        ymax = as.numeric(unique(filt_imp_clones_sum$Patient)[[5]]) + 0.44) +
  annotate("rect", col = "#920000FF", alpha = 0, size = 0.8,
        xmin = -0.01, xmax = 1.01,
        ymin = as.numeric(unique(filt_imp_clones_sum$Patient)[[6]]) - 0.44, 
        ymax = as.numeric(unique(filt_imp_clones_sum$Patient)[[6]]) + 0.44) +
  annotate("rect", col = "#920000FF", alpha = 0, size = 0.8,
        xmin = -0.01, xmax = 1.01,
        ymin = as.numeric(unique(filt_imp_clones_sum$Patient)[[7]]) - 0.44, 
        ymax = as.numeric(unique(filt_imp_clones_sum$Patient)[[7]]) + 0.44) +
  annotate("rect", col = "#920000FF", alpha = 0, size = 0.8,
        xmin = -0.01, xmax = 1.01,
        ymin = as.numeric(unique(filt_imp_clones_sum$Patient)[[8]]) - 0.44, 
        ymax = as.numeric(unique(filt_imp_clones_sum$Patient)[[8]]) + 0.44) +
  annotate("rect", col = "#920000FF", alpha = 0, size = 0.8,
        xmin = -0.01, xmax = 1.01,
        ymin = as.numeric(unique(filt_imp_clones_sum$Patient)[[9]]) - 0.44, 
        ymax = as.numeric(unique(filt_imp_clones_sum$Patient)[[9]]) + 0.44)

plot

3.9 Get session info

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] lubridate_1.9.4 forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4    
 [5] purrr_1.0.4     readr_2.1.5     tidyr_1.3.1     tibble_3.2.1   
 [9] ggplot2_3.5.1   tidyverse_2.0.0

loaded via a namespace (and not attached):
 [1] gtable_0.3.6      jsonlite_1.8.9    compiler_4.3.2    tidyselect_1.2.1 
 [5] scales_1.3.0      fastmap_1.2.0     R6_2.6.1          labeling_0.4.3   
 [9] generics_0.1.3    knitr_1.49        htmlwidgets_1.6.4 munsell_0.5.1    
[13] pillar_1.10.1     tzdb_0.5.0        rlang_1.1.5       stringi_1.8.4    
[17] xfun_0.50         timechange_0.3.0  cli_3.6.3         withr_3.0.2      
[21] magrittr_2.0.3    digest_0.6.37     grid_4.3.2        rstudioapi_0.17.1
[25] hms_1.1.3         lifecycle_1.0.4   vctrs_0.6.5       evaluate_1.0.1   
[29] glue_1.8.0        farver_2.1.2      colorspace_2.1-1  rmarkdown_2.29   
[33] tools_4.3.2       pkgconfig_2.0.3   htmltools_0.5.8.1